writeLines(capture.output(devtools::session_info()), "sessionInfo.txt")
cleaned_data_w_age <- read_excel("data/20230424_nordsjoen_stepan.xlsx", skip = 1) %>%
clean_names() %>%
select(species, year, alder_y, length_cm, fettinnhold_percent, lsi_percent, hel_vekt_g, lever_vekt_g, matches("w_w")) %>%
drop_na(species, year, length_cm, fettinnhold_percent, lsi_percent) %>%
mutate(across(.cols = -species, .fn = ~ as.numeric(.x))) %>%
# log transform response variables and covariate length
mutate(across(.cols = matches("w_w"), .fn = ~ log10(as.numeric(.x) + 1))) %>%
mutate(across(.cols = matches("length_cm"), .fn = ~ log10(as.numeric(.x)))) %>%
mutate(across(.cols = matches("fettinnhold_percent|year|lsi|vekt_g|alder"), .fn = ~ as.numeric(.x))) %>%
rename_with(
~ str_replace_all(.x, c(
"w_w" = "ww",
"^s" = "",
"fettinnhold_percent" = "fat_pc",
"lsi_percent" = "lsi_pc"
)) %>%
str_to_lower(),
.cols = !matches("species")
) %>%
# remove obs with no POP data
filter(if_any(matches("ww"), ~ !is.na(.x))) %>%
pivot_longer(cols = matches("ww"), names_to = "variable", values_to = "value") %>%
# update two values found to be incorrectly entered
mutate(
hel_vekt_g = if_else(hel_vekt_g == 635 & lever_vekt_g == 151.9, 6135, hel_vekt_g),
lsi_pc = if_else(hel_vekt_g == 6135 & lever_vekt_g == 151.9, lever_vekt_g / hel_vekt_g, lsi_pc),
hel_vekt_g = if_else(hel_vekt_g == 1004 & lever_vekt_g == 780.0, 10040, hel_vekt_g),
lsi_pc = if_else(hel_vekt_g == 10040 & lever_vekt_g == 780.0, lever_vekt_g / hel_vekt_g, lsi_pc),
)
raw_data <- read_excel("data/20230424_nordsjoen_stepan.xlsx", skip = 1) %>%
clean_names() %>%
select(species, year, length_cm, fettinnhold_percent, lsi_percent, hel_vekt_g, lever_vekt_g, matches("w_w")) %>%
drop_na(species, year, length_cm, fettinnhold_percent, lsi_percent) %>%
mutate(across(.cols = -species, .fn = ~ as.numeric(.x)))
cleaned_data <- raw_data %>%
# log transform response variables and covariate length
mutate(across(.cols = matches("w_w"), .fn = ~ log10(as.numeric(.x) + 1))) %>%
mutate(across(.cols = matches("length_cm"), .fn = ~ log10(as.numeric(.x)))) %>%
mutate(across(.cols = matches("fettinnhold_percent|year|lsi|vekt_g"), .fn = ~ as.numeric(.x))) %>%
rename_with(
~ str_replace_all(.x, c(
"w_w" = "ww",
"^s" = "",
"fettinnhold_percent" = "fat_pc",
"lsi_percent" = "lsi_pc"
)) %>%
str_to_lower(),
.cols = !matches("species")
) %>%
# remove obs with no POP data
filter(if_any(matches("ww"), ~ !is.na(.x))) %>%
pivot_longer(cols = matches("ww"), names_to = "variable", values_to = "value") %>%
# update two values found to be incorrectly entered
mutate(
hel_vekt_g = if_else(hel_vekt_g == 635 & lever_vekt_g == 151.9, 6135, hel_vekt_g),
lsi_pc = if_else(hel_vekt_g == 6135 & lever_vekt_g == 151.9, lever_vekt_g / hel_vekt_g, lsi_pc),
hel_vekt_g = if_else(hel_vekt_g == 1004 & lever_vekt_g == 780.0, 10040, hel_vekt_g),
lsi_pc = if_else(hel_vekt_g == 10040 & lever_vekt_g == 780.0, lever_vekt_g / hel_vekt_g, lsi_pc),
)
cleaned_data %>%
group_by(species, variable, year) %>%
summarise(n_obs = n()) %>%
ungroup() %>%
select(-variable) %>%
distinct(species, year, .keep_all = TRUE) %>% pivot_wider(names_from = "species", values_from = "n_obs") %>%
gt(
caption = "Samples per year per species.",
groupname_col = "species",
rowname_col = "year"
)
| Cod | Haddock | Saithe | |
|---|---|---|---|
| 2005 | 50 | 50 | 50 |
| 2008 | 46 | 50 | 49 |
| 2011 | 51 | 49 | NA |
| 2013 | 50 | 50 | 48 |
| 2016 | 50 | 44 | 49 |
| 2019 | 9 | 41 | 50 |
| 2010 | NA | 50 | 50 |
To investigate if there was a time trend for any contaminants in the three gadoid species, one identical model was chosen and fitted to each different dataset (3 species x 6 contaminants = 18). Potential confounders were fish age or proxies such as fish length and fish weight as well as liver weight, liver fat content and hepatosomatic or liver somatic index (LSI). It was known that effect of these variables will differ between species, owing to their distinct physiology and feeding habits. Hence, a model was fitted to each species as comparisons between different fish species were not a priority. Due to collinearity with LSI and fish length, weight of liver and fish was omitted. Fish age was omitted due to missingness of a substantial fraction of observations and collinearity with fish length. As such, a multiple linear model including explanatory variables year, fish length, fat percentage and LSI was chosen. Two outliers were identified in LSI. Upon further investigation, it was determined that these values were the result of typographic errors during data collection. These errors were corrected during the data cleaning. Additional 3 cod samples in 2019 missed values for fat percentage and were not included for the statistical analyses.
# linear regression model
pop_lm <- function(in_dat) {
lm(
data = in_dat, formula = value ~ year + length_cm + fat_pc + lsi_pc,
na.action = na.exclude
)
}
# conditional partial residuals manually calculated
p_man <- cleaned_data %>%
ungroup() %>%
group_by(species, variable) %>%
drop_na(value) %>%
nest() %>%
# apply model and calculate medians of covariates
mutate(
model = map(data, pop_lm),
med_length_cm = map(
data,
~ median(.x$length_cm, na.rm = TRUE)
) %>% as.numeric(),
med_fat_pc = map(
data,
~ median(.x$fat_pc, na.rm = TRUE)
) %>% as.numeric(),
med_lsi_pc = map(
data,
~ median(.x$lsi_pc, na.rm = TRUE)
) %>% as.numeric(),
mean_length_cm = map(
data,
~ mean(.x$length_cm, na.rm = TRUE)
) %>% as.numeric(),
mean_fat_pc = map(
data,
~ mean(.x$fat_pc, na.rm = TRUE)
) %>% as.numeric(),
mean_lsi_pc = map(
data,
~ mean(.x$lsi_pc, na.rm = TRUE)
) %>% as.numeric(),
mean_year = map(
data,
~ mean(.x$year, na.rm = TRUE)
) %>% as.numeric(),
mean_value = map(
data,
~ mean(.x$value, na.rm = TRUE)
) %>% as.numeric()
) %>%
mutate(
glance = map(model, broom::glance),
year_pval = map(model, ~ broom::tidy(.x)[[2, 5]] %>% as.numeric()),
year_estimate = map(model, ~ broom::tidy(.x)[[2, 2]] %>% as.numeric() %>% {10^. - 1}),
year_estimate_nolog = map(model, ~ broom::tidy(.x)[[2, 2]] %>% as.numeric()),
# extracting model coefficients for partial residual calculation
tidy_coeff = map(model, ~ broom::tidy(.x) %>%
select(term, estimate) %>%
pivot_wider(names_from = "term", values_from = "estimate") %>%
clean_names() %>%
rename_with(.cols = everything(), .fn = ~ paste0(.x, "_coeff"))),
# tidy gets summary variables of the model coefficients
tidy = map(model, ~ broom::tidy(.x)),
# augment gets per observation variables (e.g. residuals)
augment = map2(model, data, ~ broom::augment(.x, .y)),
partial_resids = map(model, ~ residuals(.x, type = "partial") %>%
as_tibble() %>%
rename_with(.cols = everything(), .fn = ~ paste0(.x, "_partial_resid"))),
normal_resids = map(model, ~ residuals(.x) %>%
as_tibble() %>%
rename_with(.cols = everything(), .fn = ~ paste0(.x, "_normal_resid")))
) %>%
# add package-generated plots for an extra check
mutate(
visreg = map(
model,
~ visreg(.x, "year", gg = TRUE, type = "conditional")
),
effect_plot = map(
model,
~ effect_plot(.x, pred = year, interval = TRUE, partial.residuals = TRUE)
),
variable = toupper(variable) %>% str_replace_all("_WW", ""),
) %>%
# vifs to check for collinearity
mutate(
vifs = map(model, ~ car::vif(.x) %>% as_tibble())
)
#Residuals vs fitted
p_man %>% unnest(augment) %>%
ggplot(aes(.fitted, .resid)) +
geom_point(size = 0.2) +
geom_hline(yintercept = 0, color = "red") +
geom_smooth(method = "loess", formula = y ~ x, se = T, span = 2)+
facet_grid(species~variable, scales = "free")
Some non-linear patterns observed e.g for HCB in haddock and cod, yet patterns are not systematic across species and contaminant. Thus, the current model was retained.
#Residuals vs each covariate
p_man %>% unnest(augment) %>%
ggplot(aes(length_cm, .resid)) +
geom_point(size = 0.2) +
geom_hline(yintercept = 0, color = "red") +
geom_smooth(method = "loess", formula = y ~ x, se = T, span = 2)+
facet_grid(species~variable, scales = "free")
p_man %>% unnest(augment) %>%
ggplot(aes(fat_pc, .resid)) +
geom_point(size = 0.2) +
geom_hline(yintercept = 0, color = "red") +
geom_smooth(method = "loess", formula = y ~ x, se = T, span = 2)+
facet_grid(species~variable, scales = "free")
p_man %>% unnest(augment) %>%
ggplot(aes(lsi_pc, .resid)) +
geom_point(size = 0.2) +
geom_hline(yintercept = 0, color = "red") +
geom_smooth(method = "loess", formula = y ~ x, se = T, span = 2)+
facet_grid(species~variable, scales = "free")
ggplot(p_man %>% unnest(augment), aes(sample = value)) +
stat_qq(size = 0.2) +
stat_qq_line(color = "red", size = 0.4) +
facet_grid(species ~ variable, scales = "free")
ggplot(p_man %>% unnest(augment), aes(value)) +
geom_histogram(aes(y=..density..), color='gray50',
alpha=0.2, position = "identity")+
geom_density(alpha=0.2, fill = "red")+
facet_grid(species ~ variable, scales = "free")
Deviation from normality is observed and p-values should be interpreted with caution. Bimodality observed for HCH could not be linked to sampling location.
p_man %>%
unnest(augment) %>%
ggplot(aes(x = .fitted, y = sqrt(abs(.std.resid)))) +
geom_point(size = 0.2) +
geom_smooth(method = "loess", formula = y ~ x, span = 2, se = T, size = 0.5) +
geom_smooth(method = "lm", formula = y ~ 1, se = FALSE, color = "red", size = 0.5, linetype="dashed", span = 2)+
facet_grid(species ~ variable, scales = "free")
No clear systematic patterns in the variance versus fitted value is observed, most showing relatively homoskedastic behavior.
# year
cleaned_data %>%
select(-variable, -value, -hel_vekt_g, -lever_vekt_g) %>%
mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
pivot_longer(cols = -c("species", "year"), names_to = "variable", values_to = "value") %>%
ggplot(aes(as_factor(year), value)) +
geom_point() + geom_boxplot()+
stat_summary(fun = "mean", geom = "point", color = "red", size = 3, shape = 3)+
theme(
axis.text.x = element_text(angle = 90, hjust = 1)
)+
ggh4x::facet_grid2(species ~ variable, scales = "free_y", independent = "y")
# # fat_pc
# cleaned_data %>%
# select(-variable, -value, -hel_vekt_g, -lever_vekt_g) %>%
# mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
# pivot_longer(cols = -c("species", "year"), names_to = "variable", values_to = "value") %>%
# ggplot(aes(as_factor(year), value)) +
# geom_point() + geom_boxplot()+
# theme(
# axis.text.x = element_text(angle = 90, hjust = 1)
# )+
# ggh4x::facet_grid2(species ~ variable, scales = "free_y", independent = "y")
#
# # lsi_pc
# cleaned_data %>%
# select(-variable, -value, -hel_vekt_g, -lever_vekt_g) %>%
# mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
# pivot_longer(cols = -c("species", "year"), names_to = "variable", values_to = "value") %>%
# ggplot(aes(as_factor(year), value)) +
# geom_point() + geom_boxplot()+
# theme(
# axis.text.x = element_text(angle = 90, hjust = 1)
# )+
# ggh4x::facet_grid2(species ~ variable, scales = "free_y", independent = "y")
#
# # length_cm
# cleaned_data %>%
# select(-variable, -value, -hel_vekt_g, -lever_vekt_g) %>%
# mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
# pivot_longer(cols = -c("species", "year"), names_to = "variable", values_to = "value") %>%
# ggplot(aes(as_factor(year), value)) +
# geom_point() + geom_boxplot()+
# theme(
# axis.text.x = element_text(angle = 90, hjust = 1)
# )+
# ggh4x::facet_grid2(species ~ variable, scales = "free_y", independent = "y")
VIFs were determined to be below 3 for each dependent variable. However, some collinearity e.g. a decrease in LSI for haddock with year due to fish weight/size increase is observed.
# cleaned_data %>%
# select(-variable, -value) %>%
# mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
# pivot_longer(cols = -"species", names_to = "variable", values_to = "value") %>%
# ggplot(aes(value)) +
# geom_histogram(bins = 50) +
# facet_grid(species ~ variable, scales = "free")+ theme(
# axis.text.x = element_text(angle = 90, hjust = 1)
# )
#
# cleaned_data %>%
# select(-variable, -value) %>%
# mutate(across(!matches("species"), ~ as.numeric(.x))) %>%
# pivot_longer(cols = -"species", names_to = "variable", values_to = "value") %>%
# ggplot(aes(value)) + geom_boxplot() + facet_grid(species ~ variable, scales = "free")+ theme(
# axis.text.x = element_text(angle = 90, hjust = 1)
# )
Two outliers were identified in LSI. Upon further investigation, it was determined that these values were the result of typographic errors during data collection. These errors were corrected during the data cleaning above.
pcb_hcb <- read_excel("data/20230907_Datasettet med PCB6.xlsx", skip = 1) %>%
clean_names() %>%
select(species, year, alder_y, length_cm, fettinnhold_percent, lsi_percent, hel_vekt_g, lever_vekt_g, matches("w_w|pcb")) %>%
drop_na(species, year, length_cm, fettinnhold_percent, lsi_percent) %>%
mutate(across(.cols = -species, .fn = ~ as.numeric(.x))) %>%
mutate(across(.cols = matches("length_cm"), .fn = ~ as.numeric(.x))) %>%
mutate(across(.cols = matches("fettinnhold_percent|year|lsi|vekt_g|alder|pcb"), .fn = ~ as.numeric(.x))) %>%
rename_with(
~ str_replace_all(.x, c(
"w_w" = "ww",
"^s" = "",
"fettinnhold_percent" = "fat_pc",
"lsi_percent" = "lsi_pc"
)) %>%
str_to_lower(),
.cols = !matches("species")
) %>%
# remove obs with no POP data
filter(if_any(matches("ww|pcb"), ~ !is.na(.x))) %>%
pivot_longer(cols = matches("ww|pcb"), names_to = "variable", values_to = "value") %>%
# update two values found to be incorrectly entered
mutate(
hel_vekt_g = if_else(hel_vekt_g == 635 & lever_vekt_g == 151.9, 6135, hel_vekt_g),
lsi_pc = if_else(hel_vekt_g == 6135 & lever_vekt_g == 151.9, lever_vekt_g / hel_vekt_g, lsi_pc),
hel_vekt_g = if_else(hel_vekt_g == 1004 & lever_vekt_g == 780.0, 10040, hel_vekt_g),
lsi_pc = if_else(hel_vekt_g == 10040 & lever_vekt_g == 780.0, lever_vekt_g / hel_vekt_g, lsi_pc),
)
pcb_hcb %>%
filter(variable %in% c("pcb6", "hcb_ww")) %>% ungroup() %>%
mutate(hline_leg = if_else(variable == "pcb6", 200, 10) %>%
as.numeric()) %>%
# unnest(data) %>%
# mutate(value = 10^value - 1) %>%
filter(value < 1300) %>%
ggplot(aes(year, value, group = year)) +
geom_jitter(width = 0.35, size = 0.3) +
geom_boxplot(outlier.shape = NA, width = 1.65) +
scale_x_continuous(breaks = c(2019, 2016, 2013, 2011, 2008, 2005, 2010)) +
stat_summary(fun.y = mean, geom = "point", shape = 3, size = 1.8, color = "red", fill = "red") +
geom_hline(na.rm = TRUE, aes(yintercept = hline_leg), color = "red", linetype = "dashed") +
scale_y_continuous(breaks = pretty_breaks(n = 6))+
ylab("Concentration [\u00b5g/kg]") +
xlab("Year") +
facet_grid(variable ~ species, scales = "free")
ggsave("hcb_pcb_reg.svg", width = 6, height = 3.5)
p_man %>%
filter(variable %in% c("DDT", "HCH"),
species == "Haddock") %>% unnest(data) %>%
mutate(value = 10^value - 1) %>%
ggplot(aes(lsi_pc, value))+
geom_point(size = 0.5) +
geom_smooth(method = "lm", color = "red", size = 0.75) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5))+
facet_wrap(~variable, scales = "free") +
ylab("Concentration [\u00b5g/kg]") +
xlab("LSI [%]")
ggsave("ddt_hch_bivar.svg", width = 9, height = 3)
cleaned_data_w_age %>%
mutate(
variable = toupper(variable) %>% str_replace_all("_WW", "")
) %>%
filter(variable %in% c("DDT", "PCB7"),
species == "Cod") %>%
mutate(value = 10^value - 1) %>%
ggplot(aes(alder_y, value))+
geom_point(size = 0.5) +
geom_smooth(method = "lm", color = "red", size = 0.75) +
scale_x_continuous(breaks = pretty_breaks(n = 5))+
theme(axis.text.x = element_text(angle = 0, hjust = 0.5))+
facet_wrap(~factor(variable, levels = c(
"PCB7",
"DDT",
"HCH",
"HCB",
"TNC",
"PBDE"
)), scales = "free",
) +
ylab("Concentration [\u00b5g/kg]") +
xlab("Age [years]")
ggsave("pcb_ddt_age.svg", width = 9, height = 3)
Assuming the least square estimate of the covariates \(X_{1}\), \(X_{2}\) and \(X_{3}\) and \(y_i\) and \(\epsilon_i\) is the response and residual for the i-th point:
\[ y_i = f_1(x_{i1}) + f_2(x_{i2}) + f_3(x_{i3}) + \epsilon_i. \]
The partial residual of \(X_3\) for the i-th observation is then generally defined as (Sohil et al., 2021) (Larsen and McCleary, 1972):
\[ r_i = y_i-f_1\left(x_{i 1}\right)-f_2\left(x_{i 2}\right) = \epsilon_i + f_3(x_{i3}) , \]
where in the present case
\[ y_i = \hat b_0 + \hat b_{year}(year_{i})+ \hat b_{length}(length_{i})+\hat b_{fat pc}(fatpc_{i})+ \hat b_{lsi pc}(lsi pc_{i})+ \epsilon_i \] or, similarly
\[ y_i = \bar y + \hat b_{year}\cdot (x_{year}-\bar x_{year}) + \hat b_{length}\cdot (x_{length}-\bar x_{length}) + \hat b_{fatpc}\cdot (x_{fatpc}-\bar x_{fatpc}) + \hat b_{lsipc}\cdot (x_{lsipc}-\bar x_{lsipc}) + \epsilon_i \]
p_man %>%
unnest(tidy_coeff, partial_resids, data, normal_resids, augment) %>%
ungroup() %>%
ggplot(aes(year, (value_normal_resid + year * year_coeff + intercept_coeff + med_length_cm * length_cm_coeff + med_fat_pc * fat_pc_coeff + med_lsi_pc * lsi_pc_coeff))) +
geom_jitter(size = 0.3, na.rm = T, height = 0, width = 0.25) +
# geom_point(aes(year, value), size = 0.2, color = "darkgrey", position = position_jitternudge(width = 0.2, height = 0,
# seed = 123, x = 0.5,
# direction = "as.is",
# nudge.from = "jittered"))+
### TEST COND PART RESID
# geom_point(aes(year, value_normal_resid + mean_value +
# year_coeff*(year - mean_year) + length_cm_coeff*(med_length_cm - mean_length_cm) + fat_pc_coeff*(med_fat_pc - mean_fat_pc)), size = 0.2, color = "orange", position = position_jitternudge(width = 0.2, height = 0,
# seed = 123, x = -0.5,
# direction = "as.is",
# nudge.from = "jittered"))+
### TEST COND PART RESID END
stat_smooth(method = "lm", na.rm = TRUE, se = FALSE, color = "red", size = 0.5) +
# conditional coloring percentage change
geom_text(
data = p_man %>% group_by(species, variable) %>%
distinct(year_estimate, .keep_all = TRUE),
color =
case_when(
p_man %>% group_by(species, variable) %>%
distinct(year_estimate) %>% pull(year_estimate) > 0 &
p_man %>%
group_by(species, variable) %>%
distinct(year_pval) %>%
pull(year_pval) < 0.05 ~ "red",
p_man %>% group_by(species, variable) %>%
distinct(year_estimate) %>% pull(year_estimate) < 0 &
p_man %>%
group_by(species, variable) %>%
distinct(year_pval) %>%
pull(year_pval) < 0.05 ~ "green",
TRUE ~ "black"
),
fontface = "bold",
x = -Inf, y = Inf, hjust = -0.075, vjust = 1.5,
size = p_man %>% group_by(species, variable) %>%
distinct(year_estimate) %>% pull(year_estimate) %>% as.numeric() %>% abs()*150/14 + 2.3,
# size = 3.7,
aes(label = paste0(round(year_estimate %>% as.numeric() * 100, digits = 1) %>% number(style_positive = "plus", accuracy = 0.1), "%"))
)+
# p-values
geom_text(
data = p_man %>% group_by(species, variable) %>%
distinct(year_pval, .keep_all = TRUE), color = "grey30",
x = 2004.9, y = 0.1, hjust = 0, vjust = 0.6, size = 2.5,
aes(label = paste0("p = ", round(year_pval %>% as.numeric(), digits = 3) %>% format(nsmall = 2)))
) +
# geom_text(
# data = p_man %>% unnest(tidy) %>% group_by(species, variable) %>%
# distinct(year_coeff, .keep_all = TRUE), color = "red",
# x = -Inf, y = Inf, hjust = -0.1, vjust = 3, size = 2.5,
# aes(label = paste0("slope = ", round(year_coeff %>% as.numeric(), digits = 4)))
# ) +
scale_x_continuous(breaks = c(2019, 2016, 2013, 2011, 2008, 2005, 2010)) +
scale_y_continuous(breaks = c(0, 0.5, 1, 1.5, 2, 2.5, 3), limits = c(0, 3.2)) +
ylab("log(Concentration + 1) [\u00b5g/kg]") +
xlab("Year | (Length, Fat and LSI)") +
facet_grid(species ~ factor(variable, levels = c(
"PCB7",
"DDT",
"HCH",
"HCB",
"TNC",
"PBDE"
)))
ggsave("conditional_pr_stepan_data.svg", width = 11, height = 6)
# comditional jtools and visreg
# p_man$visreg[[18]] + scale_y_continuous(breaks = pretty_breaks(n = 10))
# p_man$effect_plot[[18]]
# patchwork::wrap_plots(p_man$visreg)
p_man %>% clean_names() %>%
select(species, variable, tidy) %>%
unnest(tidy) %>%
filter(term == "year") %>%
select(species, variable, p.value, estimate) %>%
mutate(estimate = 10^estimate - 1) %>%
gt(caption = "Percentage decrease per year.",
groupname_col = "variable",
rowname_col = "species") %>%
tab_style(
location = cells_body(
columns = estimate,
rows = estimate > 0 & p.value < 0.05
),
style = list(cell_text(color = "red"))
) %>%
tab_style(
location = cells_body(
columns = estimate,
rows = estimate < 0 & p.value < 0.05
),
style = list(cell_text(color = "green"))
) %>%
fmt_number(everything(), decimals = 3) %>%
fmt_percent(estimate, decimals = 1) %>%
tab_footnote("Estimated annual percentage change obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase.")
| p.value | estimate | |
|---|---|---|
| PCB7 | ||
| Cod | 0.039 | −1.5% |
| Haddock | 0.000 | −6.8% |
| Saithe | 0.000 | −3.0% |
| DDT | ||
| Cod | 0.055 | −1.6% |
| Haddock | 0.000 | −8.6% |
| Saithe | 0.000 | −4.5% |
| HCH | ||
| Cod | 0.017 | −0.9% |
| Haddock | 0.000 | −2.7% |
| Saithe | 0.000 | −2.8% |
| HCB | ||
| Cod | 0.020 | 1.1% |
| Haddock | 0.000 | 1.8% |
| Saithe | 0.035 | −0.8% |
| TNC | ||
| Cod | 0.394 | −0.7% |
| Haddock | 0.000 | −4.9% |
| Saithe | 0.000 | −3.5% |
| PBDE | ||
| Cod | 0.000 | −6.8% |
| Haddock | 0.000 | −12.9% |
| Saithe | 0.000 | −15.9% |
| Estimated annual percentage change obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase. | ||
p_man %>% clean_names() %>%
select(species, variable, tidy) %>%
unnest(tidy) %>%
filter(term == "lsi_pc") %>%
select(species, variable, p.value, estimate) %>%
mutate(estimate = 10^estimate - 1) %>%
gt(caption = "Percentage change per percentage increase in lsi",
groupname_col = "variable",
rowname_col = "species") %>%
tab_style(
location = cells_body(
columns = estimate,
rows = estimate > 0 & p.value < 0.05
),
style = list(cell_text(color = "red"))
) %>%
tab_style(
location = cells_body(
columns = estimate,
rows = estimate < 0 & p.value < 0.05
),
style = list(cell_text(color = "green"))
) %>%
fmt_number(everything(), decimals = 3) %>%
fmt_percent(estimate, decimals = 1) %>%
tab_footnote("Estimated percentage change per percent LSI increase obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase.")
| p.value | estimate | |
|---|---|---|
| PCB7 | ||
| Cod | 0.000 | −9.4% |
| Haddock | 0.000 | −19.2% |
| Saithe | 0.019 | −2.7% |
| DDT | ||
| Cod | 0.000 | −10.6% |
| Haddock | 0.000 | −16.7% |
| Saithe | 0.070 | −2.2% |
| HCH | ||
| Cod | 0.206 | 1.5% |
| Haddock | 0.000 | 3.6% |
| Saithe | 0.544 | −0.2% |
| HCB | ||
| Cod | 0.000 | −8.4% |
| Haddock | 0.000 | −5.3% |
| Saithe | 0.000 | −3.4% |
| TNC | ||
| Cod | 0.000 | −11.0% |
| Haddock | 0.000 | −14.1% |
| Saithe | 0.007 | −3.1% |
| PBDE | ||
| Cod | 0.001 | −11.1% |
| Haddock | 0.000 | 14.3% |
| Saithe | 0.002 | −3.6% |
| Estimated percentage change per percent LSI increase obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase. | ||
p_man %>% clean_names() %>%
select(species, variable, tidy) %>%
unnest(tidy) %>%
filter(term == "fat_pc") %>%
select(species, variable, p.value, estimate) %>%
mutate(estimate = 10^estimate - 1) %>%
gt(caption = "Percentage change per percentage increase in fat",
groupname_col = "variable",
rowname_col = "species") %>%
tab_style(
location = cells_body(
columns = estimate,
rows = estimate > 0 & p.value < 0.05
),
style = list(cell_text(color = "red"))
) %>%
tab_style(
location = cells_body(
columns = estimate,
rows = estimate < 0 & p.value < 0.05
),
style = list(cell_text(color = "green"))
) %>%
fmt_number(everything(), decimals = 3) %>%
fmt_percent(estimate, decimals = 1) %>%
tab_footnote("Estimated percentage change per percent fat increase obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase.")
| p.value | estimate | |
|---|---|---|
| PCB7 | ||
| Cod | 0.000 | 0.8% |
| Haddock | 0.170 | 0.4% |
| Saithe | 0.048 | −0.5% |
| DDT | ||
| Cod | 0.000 | 1.3% |
| Haddock | 0.000 | 1.7% |
| Saithe | 0.041 | −0.5% |
| HCH | ||
| Cod | 0.000 | 1.5% |
| Haddock | 0.000 | 0.9% |
| Saithe | 0.000 | 1.1% |
| HCB | ||
| Cod | 0.000 | 2.8% |
| Haddock | 0.000 | 1.9% |
| Saithe | 0.000 | 1.4% |
| TNC | ||
| Cod | 0.000 | 1.4% |
| Haddock | 0.000 | 1.3% |
| Saithe | 0.047 | −0.5% |
| PBDE | ||
| Cod | 0.000 | 1.2% |
| Haddock | 0.741 | −0.1% |
| Saithe | 0.959 | −0.0% |
| Estimated percentage change per percent fat increase obtained by exponentiation and corresponding p-values. Green color represents a significant decrease at the p = 0.05 level, while red color indicates a significant increase. | ||
cleaned_data_notrans <- raw_data %>%
# log transform covariate length
mutate(across(.cols = matches("w_w"), .fn = ~ as.numeric(.x))) %>%
mutate(across(.cols = matches("length_cm"), .fn = ~ log10(as.numeric(.x)))) %>%
mutate(across(.cols = matches("fettinnhold_percent|year|lsi|vekt_g"), .fn = ~ as.numeric(.x))) %>%
rename_with(
~ str_replace_all(.x, c(
"w_w" = "ww",
"^s" = "",
"fettinnhold_percent" = "fat_pc",
"lsi_percent" = "lsi_pc"
)) %>%
str_to_lower(),
.cols = !matches("species")
) %>%
# remove obs with no POP data
filter(if_any(matches("ww"), ~ !is.na(.x))) %>%
pivot_longer(cols = matches("ww"), names_to = "variable", values_to = "value") %>%
# update two values found to be incorrectly entered
mutate(
hel_vekt_g = if_else(hel_vekt_g == 635 & lever_vekt_g == 151.9, 6135, hel_vekt_g),
lsi_pc = if_else(hel_vekt_g == 6135 & lever_vekt_g == 151.9, lever_vekt_g / hel_vekt_g, lsi_pc),
hel_vekt_g = if_else(hel_vekt_g == 1004 & lever_vekt_g == 780.0, 10040, hel_vekt_g),
lsi_pc = if_else(hel_vekt_g == 10040 & lever_vekt_g == 780.0, lever_vekt_g / hel_vekt_g, lsi_pc),
)
p_man_notrans <- cleaned_data_notrans %>%
ungroup() %>%
group_by(species, variable) %>%
drop_na(value) %>%
nest() %>%
# apply model and calculate medians of covariates
mutate(
model = map(data, pop_lm),
med_length_cm = map(
data,
~ median(.x$length_cm, na.rm = TRUE)
) %>% as.numeric(),
med_fat_pc = map(
data,
~ median(.x$fat_pc, na.rm = TRUE)
) %>% as.numeric(),
med_lsi_pc = map(
data,
~ median(.x$lsi_pc, na.rm = TRUE)
) %>% as.numeric(),
mean_length_cm = map(
data,
~ mean(.x$length_cm, na.rm = TRUE)
) %>% as.numeric(),
mean_fat_pc = map(
data,
~ mean(.x$fat_pc, na.rm = TRUE)
) %>% as.numeric(),
mean_lsi_pc = map(
data,
~ mean(.x$lsi_pc, na.rm = TRUE)
) %>% as.numeric(),
mean_year = map(
data,
~ mean(.x$year, na.rm = TRUE)
) %>% as.numeric(),
mean_value = map(
data,
~ mean(.x$value, na.rm = TRUE)
) %>% as.numeric()
) %>%
mutate(
glance = map(model, broom::glance),
year_pval = map(model, ~ broom::tidy(.x)[[2, 5]] %>% as.numeric()),
year_estimate = map(model, ~ broom::tidy(.x)[[2, 2]] %>% as.numeric() %>% {10^. - 1}),
year_estimate_nolog = map(model, ~ broom::tidy(.x)[[2, 2]] %>% as.numeric()),
# extracting model coefficients for partial residual calculation
tidy_coeff = map(model, ~ broom::tidy(.x) %>%
select(term, estimate) %>%
pivot_wider(names_from = "term", values_from = "estimate") %>%
clean_names() %>%
rename_with(.cols = everything(), .fn = ~ paste0(.x, "_coeff"))),
# tidy gets summary variables of the model coefficients
tidy = map(model, ~ broom::tidy(.x)),
# augment gets per observation variables (e.g. residuals)
augment = map2(model, data, ~ broom::augment(.x, .y)),
partial_resids = map(model, ~ residuals(.x, type = "partial") %>%
as_tibble() %>%
rename_with(.cols = everything(), .fn = ~ paste0(.x, "_partial_resid"))),
normal_resids = map(model, ~ residuals(.x) %>%
as_tibble() %>%
rename_with(.cols = everything(), .fn = ~ paste0(.x, "_normal_resid")))
) %>%
# add package-generated plots for an extra check
mutate(
visreg = map(
model,
~ visreg(.x, "year", gg = TRUE, type = "conditional")
),
effect_plot = map(
model,
~ effect_plot(.x, pred = year, interval = TRUE, partial.residuals = TRUE)
),
variable = toupper(variable) %>% str_replace_all("_WW", ""),
) %>%
# vifs to check for collinearity
mutate(
vifs = map(model, ~ car::vif(.x) %>% as_tibble())
)
p_man_notrans %>%
unnest(tidy_coeff, partial_resids, data, normal_resids, augment) %>%
ungroup() %>%
ggplot(aes(year, (value_normal_resid + year * year_coeff + intercept_coeff + med_length_cm * length_cm_coeff + med_fat_pc * fat_pc_coeff + med_lsi_pc * lsi_pc_coeff))) +
geom_jitter(size = 0.3, na.rm = T, height = 0, width = 0.25) +
# geom_point(aes(year, value), size = 0.2, color = "darkgrey", position = position_jitternudge(width = 0.2, height = 0,
# seed = 123, x = 0.5,
# direction = "as.is",
# nudge.from = "jittered"))+
### TEST COND PART RESID
# geom_point(aes(year, value_normal_resid + mean_value +
# year_coeff*(year - mean_year) + length_cm_coeff*(med_length_cm - mean_length_cm) + fat_pc_coeff*(med_fat_pc - mean_fat_pc)), size = 0.2, color = "orange", position = position_jitternudge(width = 0.2, height = 0,
# seed = 123, x = -0.5,
# direction = "as.is",
# nudge.from = "jittered"))+
### TEST COND PART RESID END
geom_smooth(method = "lm", color = "orange")+
theme_bw() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
legend.key = element_blank(),
strip.background = element_rect(colour="black", fill="white"),
strip.text.x = element_text(face = "bold"),
strip.text.y = element_text(face = "bold")
) +
scale_x_continuous(breaks = c(2019, 2016, 2013, 2011, 2008, 2005, 2010)) +
ylab("Concentration [\u00b5g/kg]") +
xlab("Year | (Length, Fat and LSI)") +
facet_grid2(species ~ factor(variable, levels = c(
"PCB7",
"DDT",
"HCH",
"HCB",
"TNC",
"PBDE"
)), scales = "free_y", independent = "y")
# Cod
Cod_reg_tab <- rbind(
p_man %>%
filter(species == "Cod") %>%
ungroup() %>%
select(variable, model, tidy, glance, augment) %>%
unnest(tidy) %>%
select(-model, -glance, -augment) %>%
select(-std.error, -statistic) %>%
pivot_longer(cols = c(estimate, p.value), names_to = "property") %>%
pivot_wider(names_from = variable, values_from = value) %>%
mutate(
term = str_replace_all(term, c(
"year" = "Year",
"length_cm" = "Length",
"fat_pc" = "Fat %",
"lsi_pc" = "LSI %"
))
) %>%
filter(!str_detect(term, "tercept")) %>%
ungroup(),
p_man %>%
filter(species == "Cod") %>%
ungroup() %>%
select(variable, model, tidy, glance, augment) %>%
unnest(glance) %>%
select(-model, -tidy, -augment) %>%
mutate(term = "Model") %>%
select(variable, term, adj.r.squared, statistic, p.value, nobs) %>%
rename(n_obs = nobs, F.statistic = statistic) %>%
pivot_longer(cols = -c(variable, term), names_to = "property") %>%
pivot_wider(names_from = "variable", values_from = "value")
) %>%
gt(
caption = "Regression summary table for Cod, bold indicating non-significant coefficients.",
groupname_col = "term",
rowname_col = "property"
) %>%
fmt_number(
columns = matches("[A-Z]{2}"),
decimals = 3
) %>%
fmt_number(
columns = matches("[A-Z]{2}"),
rows = property == "n_obs",
decimals = 0
) %>%
fmt_number(
columns = matches("[A-Z]{2}"),
rows = property == "F.statistic",
decimals = 0
) %>%
tab_style(
location = cells_body(
columns = PCB7,
rows = property == "p.value" & (PCB7 > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = DDT,
rows = property == "p.value" & (DDT > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = HCH,
rows = property == "p.value" & (HCH > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = HCB,
rows = property == "p.value" & (HCB > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = TNC,
rows = property == "p.value" & (TNC > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = PBDE,
rows = property == "p.value" & (PBDE > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels(
columns = everything()
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_row_groups(groups = everything())
) %>% tab_header("Cod")
Cod_reg_tab
| Cod | ||||||
| PCB7 | DDT | HCH | HCB | TNC | PBDE | |
|---|---|---|---|---|---|---|
| Year | ||||||
| estimate | −0.007 | −0.007 | −0.004 | 0.005 | −0.003 | −0.031 |
| p.value | 0.039 | 0.055 | 0.017 | 0.020 | 0.394 | 0.000 |
| Length | ||||||
| estimate | 0.787 | 1.225 | −0.012 | 0.296 | 0.933 | 0.725 |
| p.value | 0.000 | 0.000 | 0.831 | 0.000 | 0.000 | 0.000 |
| Fat % | ||||||
| estimate | 0.004 | 0.006 | 0.007 | 0.012 | 0.006 | 0.005 |
| p.value | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| LSI % | ||||||
| estimate | −0.043 | −0.049 | 0.006 | −0.038 | −0.051 | −0.051 |
| p.value | 0.000 | 0.000 | 0.206 | 0.000 | 0.000 | 0.001 |
| Model | ||||||
| adj.r.squared | 0.228 | 0.399 | 0.662 | 0.715 | 0.303 | 0.307 |
| F.statistic | 20 | 43 | 126 | 161 | 26 | 24 |
| p.value | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| n_obs | 256 | 256 | 256 | 256 | 232 | 206 |
gtsave(Cod_reg_tab, "Cod_reg_tab.docx")
# Haddock
Haddock_reg_tab <- rbind(
p_man %>%
filter(species == "Haddock") %>%
ungroup() %>%
select(variable, model, tidy, glance, augment) %>%
unnest(tidy) %>%
select(-model, -glance, -augment) %>%
select(-std.error, -statistic) %>%
pivot_longer(cols = c(estimate, p.value), names_to = "property") %>%
pivot_wider(names_from = variable, values_from = value) %>%
mutate(
term = str_replace_all(term, c(
"year" = "Year",
"length_cm" = "Length",
"fat_pc" = "Fat %",
"lsi_pc" = "LSI %"
))
) %>%
filter(!str_detect(term, "tercept")) %>%
ungroup(),
p_man %>%
filter(species == "Haddock") %>%
ungroup() %>%
select(variable, model, tidy, glance, augment) %>%
unnest(glance) %>%
select(-model, -tidy, -augment) %>%
mutate(term = "Model") %>%
select(variable, term, adj.r.squared, statistic, p.value, nobs) %>%
rename(n_obs = nobs, F.statistic = statistic) %>%
pivot_longer(cols = -c(variable, term), names_to = "property") %>%
pivot_wider(names_from = "variable", values_from = "value")
) %>%
gt(
caption = "Regression summary table for Haddock, bold indicating non-significant coefficients.",
groupname_col = "term",
rowname_col = "property"
) %>%
fmt_number(
columns = matches("[A-Z]{2}"),
decimals = 3
) %>%
fmt_number(
columns = matches("[A-Z]{2}"),
rows = property == "n_obs",
decimals = 0
) %>%
fmt_number(
columns = matches("[A-Z]{2}"),
rows = property == "F.statistic",
decimals = 0
) %>%
tab_style(
location = cells_body(
columns = PCB7,
rows = property == "p.value" & (PCB7 > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = DDT,
rows = property == "p.value" & (DDT > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = HCH,
rows = property == "p.value" & (HCH > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = HCB,
rows = property == "p.value" & (HCB > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = TNC,
rows = property == "p.value" & (TNC > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = PBDE,
rows = property == "p.value" & (PBDE > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels(
columns = everything()
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_row_groups(groups = everything())
) %>% tab_header("Haddock")
Haddock_reg_tab
| Haddock | ||||||
| PCB7 | DDT | HCH | HCB | TNC | PBDE | |
|---|---|---|---|---|---|---|
| Year | ||||||
| estimate | −0.031 | −0.039 | −0.012 | 0.008 | −0.022 | −0.060 |
| p.value | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| Length | ||||||
| estimate | 1.756 | 0.653 | 0.058 | 0.051 | 1.067 | 1.892 |
| p.value | 0.000 | 0.030 | 0.346 | 0.573 | 0.000 | 0.000 |
| Fat % | ||||||
| estimate | 0.002 | 0.007 | 0.004 | 0.008 | 0.005 | −0.001 |
| p.value | 0.170 | 0.000 | 0.000 | 0.000 | 0.000 | 0.741 |
| LSI % | ||||||
| estimate | −0.093 | −0.079 | 0.015 | −0.023 | −0.066 | 0.058 |
| p.value | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| Model | ||||||
| adj.r.squared | 0.370 | 0.209 | 0.716 | 0.498 | 0.201 | 0.380 |
| F.statistic | 50 | 23 | 211 | 83 | 22 | 44 |
| p.value | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| n_obs | 334 | 334 | 334 | 334 | 334 | 279 |
gtsave(Haddock_reg_tab, "Haddock_reg_tab.docx")
# Saithe
Saithe_reg_tab <- rbind(
p_man %>%
filter(species == "Saithe") %>%
ungroup() %>%
select(variable, model, tidy, glance, augment) %>%
unnest(tidy) %>%
select(-model, -glance, -augment) %>%
select(-std.error, -statistic) %>%
pivot_longer(cols = c(estimate, p.value), names_to = "property") %>%
pivot_wider(names_from = variable, values_from = value) %>%
mutate(
term = str_replace_all(term, c(
"year" = "Year",
"length_cm" = "Length",
"fat_pc" = "Fat %",
"lsi_pc" = "LSI %"
))
) %>%
filter(!str_detect(term, "tercept")) %>%
ungroup(),
p_man %>%
filter(species == "Saithe") %>%
ungroup() %>%
select(variable, model, tidy, glance, augment) %>%
unnest(glance) %>%
select(-model, -tidy, -augment) %>%
mutate(term = "Model") %>%
select(variable, term, adj.r.squared, statistic, p.value, nobs) %>%
rename(n_obs = nobs, F.statistic = statistic) %>%
pivot_longer(cols = -c(variable, term), names_to = "property") %>%
pivot_wider(names_from = "variable", values_from = "value")
) %>%
gt(
caption = "Regression summary table for Saithe, bold indicating non-significant coefficients.",
groupname_col = "term",
rowname_col = "property"
) %>%
fmt_number(
columns = matches("[A-Z]{2}"),
decimals = 3
) %>%
fmt_number(
columns = matches("[A-Z]{2}"),
rows = property == "n_obs",
decimals = 0
) %>%
fmt_number(
columns = matches("[A-Z]{2}"),
rows = property == "F.statistic",
decimals = 0
) %>%
tab_style(
location = cells_body(
columns = PCB7,
rows = property == "p.value" & (PCB7 > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = DDT,
rows = property == "p.value" & (DDT > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = HCH,
rows = property == "p.value" & (HCH > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = HCB,
rows = property == "p.value" & (HCB > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = TNC,
rows = property == "p.value" & (TNC > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
location = cells_body(
columns = PBDE,
rows = property == "p.value" & (PBDE > 0.05)
),
style = list(cell_text(weight = "bold"))
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels(
columns = everything()
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_row_groups(groups = everything())
) %>% tab_header("Saithe")
Saithe_reg_tab
| Saithe | ||||||
| PCB7 | DDT | HCH | HCB | TNC | PBDE | |
|---|---|---|---|---|---|---|
| Year | ||||||
| estimate | −0.013 | −0.020 | −0.012 | −0.003 | −0.016 | −0.075 |
| p.value | 0.000 | 0.000 | 0.000 | 0.035 | 0.000 | 0.000 |
| Length | ||||||
| estimate | 1.555 | 1.565 | 0.045 | 0.381 | 1.619 | 1.902 |
| p.value | 0.000 | 0.000 | 0.367 | 0.000 | 0.000 | 0.000 |
| Fat % | ||||||
| estimate | −0.002 | −0.002 | 0.005 | 0.006 | −0.002 | 0.000 |
| p.value | 0.048 | 0.041 | 0.000 | 0.000 | 0.047 | 0.959 |
| LSI % | ||||||
| estimate | −0.012 | −0.010 | −0.001 | −0.015 | −0.014 | −0.016 |
| p.value | 0.019 | 0.070 | 0.544 | 0.000 | 0.007 | 0.002 |
| Model | ||||||
| adj.r.squared | 0.306 | 0.345 | 0.644 | 0.301 | 0.342 | 0.785 |
| F.statistic | 34 | 40 | 134 | 33 | 39 | 180 |
| p.value | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| n_obs | 296 | 296 | 296 | 296 | 296 | 197 |
gtsave(Saithe_reg_tab, "Saithe_reg_tab.docx")
# p_man %>%
# unnest(tidy_coeff, partial_resids, data, normal_resids, augment) %>%
# ungroup() %>%
# ggplot(aes(year, (value_normal_resid + year * year_coeff + intercept_coeff + med_length_cm * length_cm_coeff + med_fat_pc * fat_pc_coeff + med_lsi_pc * lsi_pc_coeff))) +
# geom_jitter(size = 0.3, na.rm = T, height = 0, width = 0.25) +
# stat_smooth(method = "lm", na.rm = TRUE, se = FALSE, color = "red", size = 0.5) +
# # conditional coloring percentage change
# geom_text(
# data = p_man %>% group_by(species, variable) %>%
# distinct(year_estimate, .keep_all = TRUE),
# color =
# case_when(
# p_man %>% group_by(species, variable) %>%
# distinct(year_estimate) %>% pull(year_estimate) > 0 &
# p_man %>%
# group_by(species, variable) %>%
# distinct(year_pval) %>%
# pull(year_pval) < 0.05 ~ "red",
# p_man %>% group_by(species, variable) %>%
# distinct(year_estimate) %>% pull(year_estimate) < 0 &
# p_man %>%
# group_by(species, variable) %>%
# distinct(year_pval) %>%
# pull(year_pval) < 0.05 ~ "green",
# TRUE ~ "black"
# ),
# fontface = "bold",
# x = -Inf, y = Inf, hjust = -0.075, vjust = 1.5,
# size = p_man %>% group_by(species, variable) %>%
# distinct(year_estimate) %>% pull(year_estimate) %>% as.numeric() %>% abs()*150/14 + 2.3,
# # size = 3.7,
# aes(label = paste0(round(year_estimate %>% as.numeric() * 100, digits = 1) %>% number(style_positive = "plus", accuracy = 0.1), "%"))
# )+
# # p-values
# geom_text(
# data = p_man %>% group_by(species, variable) %>%
# distinct(year_pval, .keep_all = TRUE), color = "grey30",
# x = 2004.9, y = 0.1, hjust = 0, vjust = 0.6, size = 2.5,
# aes(label = paste0("p = ", round(year_pval %>% as.numeric(), digits = 3) %>% format(nsmall = 2)))
# ) +
#
# scale_x_continuous(breaks = c(2019, 2016, 2013, 2011, 2008, 2005, 2010)) +
# scale_y_continuous(breaks = c(0, 0.5, 1, 1.5, 2, 2.5, 3), limits = c(0, 3.2)) +
# ylab("log(Concentration + 1) [\u00b5g/kg]") +
# xlab("Year | (Length, Fat and LSI)") +
#
# theme_bw() +
# theme(
# axis.text.x = element_text(angle = 90, hjust = 1),
# legend.key = element_blank(),
# strip.background = element_rect(colour="black", fill="white"),
# strip.text.x = element_text(face = "bold"),
# strip.text.y = element_text(face = "bold"))+
#
# facet_grid(species ~ factor(variable, levels = c(
# "PCB7",
# "DDT",
# "HCH",
# "HCB",
# "TNC",
# "PBDE"
# )))
#
# ggsave("graphical_abstr.svg", width = 11, height = 6)
library(ggrepel)
library(see)
piedata <- read_excel("data/stomach_all_haddock_and_cod_all_stations_2011.xlsx", skip = 2) %>%
select(1, Haddock, Cod) %>%
rename(c("Group" = 1)) %>%
pivot_longer(cols = -Group, names_to = "Species") %>%
group_by(Species) %>%
mutate(value = value / sum(value)) %>%
mutate(
csum = rev(cumsum(rev(value))),
pos = value / 2 + lead(csum, 1),
pos = if_else(is.na(pos), value / 2, pos),
group_id = 15-row_number()
)
piedata %>%
mutate(Group = fct_reorder(Group, group_id)) %>%
ggplot(aes(
x = "",
y = value,
fill = Group
)) +
geom_col() +
coord_polar(theta = "y") +
# guides(fill = guide_legend(title = "Group")) +
# theme_void() +
facet_wrap(~Species)
ggsave("stommach_pie.svg", width = 9, height = 6)
Visualizations in the manuscript and statistical analyses for the determination of temporal trends were carried out in R version 4.2.1. Data with missing dependent or response variables were removed, and response variables were added one prior to log-transformation with base 10 to meet assumptions of homoscedasticity and normality while preserving zero-valued observations. The dependent variable length was also log-transformed. To correct for known confounders, a multiple linear regression model with the dependent variables year, fish length, liver fat content and liver somatic index (LSI) was fitted to the cleaned and transformed data for each POP and species. To visualize the effect of year on the POP concentrations corrected for the other dependent variables, the conditional partial residuals versus year were plotted, keeping length, fat and LSI constant at their median values. The significance of the regression coefficient for the year was determined using a Wald test to test the null hypothesis of no time trend. As detailed in the Supplementary Materials, model validation revealed some deviations from the model assumptions. Thus, marginally significant effects were interpreted with caution. More details regarding the statistical model are given in Supplementary Materials.
Sohil, F., Sohali, M.U., Shabbir, J., 2021. An introduction to statistical learning with applications in R: by Gareth James, Daniela Witten, Trevor Hastie, and Robert Tibshirani, New York, Springer Science and Business Media, 2013, $41.98, eISBN: 978-1-4614-7137-7. Statistical Theory and Related Fields 1–1. https://doi.org/10.1080/24754269.2021.1980261
Larsen, W.A., McCleary, S.J., 1972. The Use of Partial Residual Plots in Regression Analysis. Technometrics 14, 781–790. https://doi.org/10/gp2cxn